/* File 2 of 3 for Table 5 */




#delimit ;
clear all;
local outfile "GWgapr10_indyreg_pca_allobs";
set more off;


di _n "$S_DATE $S_TIME";


 



********************************************************************************;
* making data set of industry-level controls from firm-level data; 
********************************************************************************;

local lab fte_del; 


use if L_hc_t>=5 using GWgap_pr_firm_v4, clear; * firms with head count 5+;

drop INpf_ind_gp* Yyear_*;

notes: Restricted to firms with head count 5+ etc;



* value added per unit of labour (hc);

gen val_ad_pw = (exp(lngo) - exp(lnM))/(L_hc_t);
replace val_ad_pw = val_ad_pw/10000;
label var val_ad_pw "Value added per worker (real $0,000s)";

local varlist "val_ad_pw";

foreach var of varlist `varlist' {;
	di "var: `var'";
	replace `var' = `var'*L_hc_t;
};


save temp_precolpca, replace;

collapse (sum) `varlist' L_hc_t , by(year pf_ind);

save temp_postcol, replace;


use temp_precolpca, clear;

collapse (sd) val_ad_pw_sd = val_ad_pw [iweight = L_hc_t], by(year pf_ind);

merge 1:1 year pf_ind using temp_postcol, keep(master match using) nogen;

label var val_ad_pw_sd "Std dev of (Gross output - M)/head count (hc wgtd)";

foreach var of varlist `varlist' {;
	replace `var' = `var'/L_hc_t;
};


label var val_ad_pw "Value added per worker (hc wgtd)";

save temp_prodybpca, replace;






********************************************************************************;
* Merging data sets together;
********************************************************************************;

use GWgapr10_pfindyr_TL_res_`lab', clear;
rename c_* TLc_*_TL;
rename se_* TLse_*_TL;
rename rr3obs TLrr3obs_TL;

rename TLc_*_TL c_*_TL;
rename TLse_*_TL se_*_TL;
rename TLrr3obs_TL rr3obs_TL;
	
gen pf_ind = ind3;
gen fin_yr = year;

gen ind2 = substr(pf_ind,1,3);
egen ind2_gp = group(ind2);

merge 1:1 year pf_ind using temp_prodybpca, keep(master match) nogen;
gen nzsioc_lvl3 = pf_ind;
merge 1:1 year pf_ind using GWgap_compet_v3, keep(master match using) keepus(*_hc_lg5 empfrac*) 
	gen(newm);


drop c_phi_f_pf_TL - se_delta_wb_TL;

gen med_comp2_hc_lg5 = 1 - low_comp2_hc_lg5 - high_comp2_hc_lg5;
gen med_profity2_hc_lg5 = 1 - low_profity2_hc_lg5 - high_profity2_hc_lg5;

foreach occ in manag tech {;
	gen mrec_`occ'_hc_lg5 = drec_`occ'_hc_lg5 - srec_`occ'_hc_lg5;
	label var mrec_`occ'_hc_lg5 "Some difficulty hiring `occ'";
	gen nrec_`occ'_hc_lg5 = 1 - drec_`occ'_hc_lg5;
	label var nrec_`occ'_hc_lg5 "No difficulty hiring `occ'";
};


label var pf_ind "Productivity industry";

keep pf_ind year c_discrim_f_TL se_discrim_f_TL val_ad_pw_sd vlow_comp2_hc_lg5 low_comp2_hc_lg5 
	med_comp2_hc_lg5 high_comp2_hc_lg5
	empfrac_hc_manag_lg5 empfrac_hc_tech_lg5 empfrac_hc_trad_lg5 empfrac_hc_other_lg5
	srec_manag_hc_lg5 mrec_manag_hc_lg5 
	nrec_manag_hc_lg5 
	srec_tech_hc_lg5 mrec_tech_hc_lg5 nrec_tech_hc_lg5; * KL_rat agefr_lt25_hc agefr_25to39_hc agefr_40to54_hc agefr_55p_hc
	wfe_mq1_hc_izi wfe_mq2_hc_izi wfe_mq3_hc_izi wfe_mq4_hc_izi avg_collec_emp_hc_lg5 profK s_f;

order pf_ind year;


export excel "`outfile'_data.xlsx", sheet("data") sheetrep first(var);
save "`outfile'_data", replace;
